

library(manifestoR)
library(tidyverse)
library(tidytext)
library(data.table)
library(dplyr)


rm(list=ls())

# set path
setwd("~/manifestos")


###################
# Download corpus #
###################


# get data frame of all available corpora
mp_setapikey("manifesto_apikey.txt")
avail <- mp_availability(TRUE) %>% filter(annotations == TRUE)
avail <- avail %>% distinct()
#fwrite(avail, file = "available data.csv")


# download corpora via loop
for (i in 1:nrow(avail)) {
  corpus_name <- paste0("corpus_", i)
  assign(corpus_name, mp_corpus(avail[i,]))
}


# extract the manifesto ID of each corpus and save in data frame
rm(corpus_name)
result_list <- list()

for (obj_name in ls(pattern = "corpus_")) {
  obj <- get(obj_name)
  result <- obj[[1]]$meta$manifesto_id
  result_list[[obj_name]] <- result
}

result_list <- enframe(result_list) %>%
  unnest
result_list <- result_list %>% distinct()


# convert corpora into sentence-level data frames
frames <- ls(pattern = "corpus_")
frames <- lapply(frames, get)

for (i in seq_along(frames)) {
  frames[[i]] <- as.data.frame(frames[[i]])
}

names(frames) <- unlist(result_list$value)
frames <- bind_rows(frames, .id = "frames")
frames <- subset(frames, select=c(frames, text, cmp_code, pos))
colnames(frames) <- c("manifesto_id", "text","cmp_code","pos")

rm(list = ls(pattern = "corpus_"))


# define policy domain as first digit of cmp_code
frames$domain <- as.numeric(substr(frames$cmp_code, 1, 1))
frames <- frames %>% 
  mutate(domain = coalesce(domain, 0))


# add language of manifesto
mp_setapikey("manifesto_apikey.txt")
avail <- as.data.frame(mp_availability(TRUE) %>% filter(annotations == TRUE))
avail <- subset(avail, select=c(party, date, language))
avail$party <- as.character(avail$party)
avail$date <- as.character(avail$date)
avail <- avail %>% distinct()

frames <- frames %>%
  separate(manifesto_id, c("party", "date"), "_")

frames <- merge(x=frames,y=avail, 
             by=c("party","date"))


# export
fwrite(frames, file = "corpus.csv")




###############################################################################
# merge meta data, implement sample restrictions, and add treatment variables #
###############################################################################

library(data.table)
library(dplyr)
library(haven)
library(tidyr)
library(stringr)

rm(list=ls())

corpus <- fread("corpus.csv")

# get meta data
meta <- read_dta("MPDataset_MPDS2023a_stata14.dta")

# election identifier
meta$elect_id <- paste(meta$countryname, as.character(meta$edate), sep = "_")

# merge corpus and meta
combined <- inner_join(corpus, meta, by = c("party", "date"))

# for now: drop observations after 2018
combined <- combined[!(combined$date>201812),]

# drop if vote share is missing
combined <- combined %>% drop_na(pervote)

# average vote share in sample
mean <- combined %>%
  group_by(party) %>%
  summarise_at(vars(pervote), list(name = mean))

# exclude parties with less than 5% average vote share
combined <- inner_join(combined, mean, by = c("party"))
combined <- combined[!(combined$name<5),]
combined <- subset(combined, select=-c(name))

# create policy domain categories
combined$category <- "other"

combined$category[combined$cmp_code == "607" | combined$category == "607.1" |
                  combined$cmp_code == "607.2" | combined$category == "607.3" |
                  combined$cmp_code == "6071" | combined$category == "6072" |
                  combined$cmp_code == "608" | combined$category == "608.1" |
                  combined$cmp_code == "608.2" | combined$category == "608.3" |
                  combined$cmp_code == "6081"] <- "multicult"

combined$category[combined$cmp_code == "504" | combined$category == "505" |
                  combined$category == "5041" |
                  combined$cmp_code == "506" | combined$category == "507" |
                  combined$cmp_code == "5061" |
                  combined$cmp_code == "503"] <- "combo"


# create empty text cells in cases where a manifesto does not address a given category
combined$party_date_country <- paste(combined$party, combined$date, combined$country, sep="_")
combined <- tidyr::complete(combined, party_date_country, category)
combined <- as.data.frame(combined)

combined <- subset(combined, select=-c(party, date, country))
combined[c('party', 'date', 'country')] <- str_split_fixed(combined$party_date_country, '_', 3)
combined$party <- as.double(combined$party)
combined$date <- as.double(combined$date)
combined$country <- as.double(combined$country)

combined <- combined %>% replace_na(list(pos = 1, text = ".", language = "english"))


# elections / manifestos are treated when the *incumbent* is a populist
# these lines are based on the data compiled by Funke et al. (2023) and publicly available information
# treatment variable (right-wing populists)
combined$pop_treat_right <- 0
# Menem 1989-1999
combined$pop_treat_right[combined$date == 199505 & combined$partyname == "Justicialist Party" & combined$countryname == "Argentina"] <- 1
combined$pop_treat_right[combined$date == 199910 & combined$partyname == "Justicialist Coalition for Change" & combined$countryname == "Argentina"] <- 1
# Borisov 2009-2013; 2014-2017; 2017-2018
combined$pop_treat_right[combined$date == 201305 & combined$partyname == "Citizens for European Development of Bulgaria" & combined$countryname == "Bulgaria"] <- 1
combined$pop_treat_right[combined$date == 201703 & combined$partyname == "Citizens for European Development of Bulgaria" & combined$countryname == "Bulgaria"] <- 1
# Orban 2010-2018
combined$pop_treat_right[combined$date == 201404 & combined$partyname == "Alliance of Federation of Young Democrats - Hungarian Civic Union - Christian Democratic People's Party" & combined$countryname == "Hungary"] <- 1
combined$pop_treat_right[combined$date == 201804 & combined$partyname == "Alliance of Federation of Young Democrats - Christian Democratic People's Party" & combined$countryname == "Hungary"] <- 1
# Netanyahu 1996-1999; 2009-2018
combined$pop_treat_right[combined$date == 199905 & combined$partyname == "The Consolidation" & combined$countryname == "Israel"] <- 1
combined$pop_treat_right[combined$date == 201301 & combined$partyname == "The Consolidation" & combined$countryname == "Israel"] <- 1
combined$pop_treat_right[combined$date == 201503 & combined$partyname == "The Consolidation" & combined$countryname == "Israel"] <- 1
# Berlusconi 2001-2006; 2008-2011	
combined$pop_treat_right[combined$date == 200604 & combined$partyname == "Go Italy" & combined$countryname == "Italy"] <- 1
# Muldoon 1975-1984
combined$pop_treat_right[combined$date == 197811 & combined$partyname == "New Zealand National Party" & combined$countryname == "New Zealand"] <- 1
combined$pop_treat_right[combined$date == 198111 & combined$partyname == "New Zealand National Party" & combined$countryname == "New Zealand"] <- 1
combined$pop_treat_right[combined$date == 198407 & combined$partyname == "New Zealand National Party" & combined$countryname == "New Zealand"] <- 1
# Kaczynskis 2005-2007; 2015-2018
combined$pop_treat_right[combined$date == 200710 & combined$partyname == "Law and Justice" & combined$countryname == "Poland"] <- 1
# Meciar 1990-1991; 1992-1994; 1994-1998
combined$pop_treat_right[combined$date == 199409 & combined$partyname == "Movement for a Democratic Slovakia" & combined$countryname == "Slovakia"] <- 1
combined$pop_treat_right[combined$date == 199809 & combined$partyname == "Movement for a Democratic Slovakia" & combined$countryname == "Slovakia"] <- 1
# Erdogan 2003-2018
combined$pop_treat_right[combined$date == 200707 & combined$partyname == "Justice and Development Party" & combined$countryname == "Turkey"] <- 1
combined$pop_treat_right[combined$date == 201106 & combined$partyname == "Justice and Development Party" & combined$countryname == "Turkey"] <- 1
combined$pop_treat_right[combined$date == 201506 & combined$partyname == "Justice and Development Party" & combined$countryname == "Turkey"] <- 1
combined$pop_treat_right[combined$date == 201511 & combined$partyname == "Justice and Development Party" & combined$countryname == "Turkey"] <- 1
combined$pop_treat_right[combined$date == 201806 & combined$partyname == "Justice and Development Party" & combined$countryname == "Turkey"] <- 1


# treatment variable (left-wing populists)
combined$pop_treat_left <- 0
# Kirchner 2003-2007
combined$pop_treat_left[combined$date == 200710 & combined$partyname == "Front for Victory" & combined$countryname == "Argentina"] <- 1
# Fernandez 2007-2015
combined$pop_treat_left[combined$date == 200906 & combined$partyname == "Front for Victory" & combined$countryname == "Argentina"] <- 1
combined$pop_treat_left[combined$date == 201110 & combined$partyname == "Front for Victory" & combined$countryname == "Argentina"] <- 1
combined$pop_treat_left[combined$date == 201310 & combined$partyname == "Front for Victory" & combined$countryname == "Argentina"] <- 1
combined$pop_treat_left[combined$date == 201510 & combined$partyname == "Front for Victory" & combined$countryname == "Argentina"] <- 1
# Morales 2006-2018
combined$pop_treat_left[combined$date == 200912 & combined$partyname == "Movement towards Socialism - Political Instrument for the Sovereignty of the Peoples" & combined$countryname == "Bolivia"] <- 1
combined$pop_treat_left[combined$date == 201410 & combined$partyname == "Movement towards Socialism - Political Instrument for the Sovereignty of the Peoples" & combined$countryname == "Bolivia"] <- 1
# Correa 2007-2017
combined$pop_treat_left[combined$date == 200904 & combined$partyname == "Proud and Sovereign Fatherland Alliance Movement" & combined$countryname == "Ecuador"] <- 1
combined$pop_treat_left[combined$date == 201302 & combined$partyname == "Proud and Sovereign Fatherland Alliance Movement" & combined$countryname == "Ecuador"] <- 1
combined$pop_treat_left[combined$date == 201702 & combined$partyname == "Proud and Sovereign Fatherland Alliance Movement" & combined$countryname == "Ecuador"] <- 1
# Tsipras 2015-2018
combined$pop_treat_left[combined$date == 201509 & combined$partyname == "Coalition of the Radical Left" & combined$countryname == "Greece"] <- 1
# Fico 2006-2010; 2012-2018
combined$pop_treat_left[combined$date == 201006 & combined$partyname == "Direction-Social Democracy" & combined$countryname == "Slovakia"] <- 1
combined$pop_treat_left[combined$date == 201603 & combined$partyname == "Direction-Social Democracy" & combined$countryname == "Slovakia"] <- 1
# Zuma 2009-2018
combined$pop_treat_left[combined$date == 201405 & combined$partyname == "African National Congress" & combined$countryname == "South Africa"] <- 1


# select relevant vars and export
combined <- subset(combined, select=c(country, party, date, category,
                                  pos, text, pop_treat_left, pop_treat_right))

fwrite(combined, file = "corpus_with_vars.csv")









#################################################################################################
#R version 4.3.0 (2023-04-21 ucrt)
#Platform: x86_64-w64-mingw32/x64 (64-bit)
#Running under: Windows 10 x64 (build 19045)
#
#Matrix products: default
#
#
#locale:
#  [1] LC_COLLATE=English_United States.utf8  LC_CTYPE=English_United States.utf8    LC_MONETARY=English_United States.utf8
#[4] LC_NUMERIC=C                           LC_TIME=English_United States.utf8    
#
#time zone: Europe/Stockholm
#tzcode source: internal
#
#attached base packages:
#  [1] parallel  stats     graphics  grDevices utils     datasets  methods   base     
#
#other attached packages:
#  [1] syuzhet_1.0.6           SentimentAnalysis_1.3-4 haven_2.5.2             data.table_1.14.8      
#[5] tidytext_0.4.1          lubridate_1.9.2         forcats_1.0.0           stringr_1.5.0          
#[9] dplyr_1.1.2             purrr_1.0.1             readr_2.1.4             tidyr_1.3.0            
#[13] tibble_3.2.1            ggplot2_3.4.2           tidyverse_2.0.0         manifestoR_1.5.0       
#[17] tm_0.7-11               NLP_0.2-1              
#
#loaded via a namespace (and not attached):
#  [1] janeaustenr_1.0.0 utf8_1.2.3        generics_0.1.3    xml2_1.3.3        slam_0.1-50       stringi_1.7.12   
#[7] lattice_0.21-8    hms_1.1.3         digest_0.6.31     magrittr_2.0.3    timechange_0.2.0  grid_4.3.0       
#[13] fastmap_1.1.1     Matrix_1.5-4      jsonlite_1.8.4    fansi_1.0.4       scales_1.2.1      mnormt_2.1.1     
#[19] cli_3.6.1         rlang_1.1.0       tokenizers_0.3.0  munsell_0.5.0     base64enc_0.1-3   withr_2.5.0      
#[25] tools_4.3.0       tzdb_0.3.0        colorspace_2.1-0  DT_0.27           vctrs_0.6.2       R6_2.5.1         
#[31] zoo_1.8-12        lifecycle_1.0.3   htmlwidgets_1.6.2 psych_2.3.3       pkgconfig_2.0.3   pillar_1.9.0     
#[37] gtable_0.3.3      glue_1.6.2        Rcpp_1.0.10       tidyselect_1.2.0  rstudioapi_0.14   SnowballC_0.7.1  
#[43] htmltools_0.5.5   nlme_3.1-162      functional_0.6    compiler_4.3.0   



